 //-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   pixel_shader
//File Name     :   fragment_tex_shading.v
//Module name   :   fragment_tex_shading
//Full name     :   fragment texturing 
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/5/13
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------

//-----------------------------
//DEFINE MACRO
//----------------------------- 


`include "gpu_bus.sv"
module fragment_tex_shading #(
    parameter CACHE_ADDR_BITS  = 32,
    parameter CACHE_BUS_WIDTH  = 64,
    parameter UV_PRECISION = 28
)
(
    input clk,                             //input clock
    input rst_n,                           //input reset, low active
    input busy_in,                         //input busy   
    input [31:0] x,                        //input fragment x coordinate, fix point 1.15.16
    input [31:0] y,                        //input fragment y coordinate, fix point 1.15.16
    input [31:0] z,                        //input fragment z coordinate, fix point 1.15.16
    input [31:0] s,                        //input fragment s texture coordinate, fix point 1.15.16
    input [31:0] t,                        //input fragment t texture coordinate, fix point 1.15.16
    input [31:0] matrix_A,                 //input texture transform matrix,, fix point 1.15.16
    input [31:0] matrix_B,                 //                                          matrix_A matrix_B matrix_C
    input [31:0] matrix_C,                 //   [s_trans' t_trans' w_trans'] =[s t 1] [matrix_D matrix_E matrix_F]
    input [31:0] matrix_D,                 //                                          matrix_G matrix_H    1
    input [31:0] matrix_E,                 //   s_trans = s_trans'/w_trans'
    input [31:0] matrix_F,                 //   s_trans = t_trans'/w_trans' 
    input [31:0] matrix_G,                 //
    input [31:0] matrix_H,                 //
    input [31:0] fog_x,                    //input fragment x fog coordinate
    input [31:0] fog_y,                    //input fragment y fog coordinate
    input [31:0] fog_z,                    //input fragment z fog coordinate  
    input [7:0] r,                         //input fragment color compment r
    input [7:0] g,                         //input fragment color compment g
    input [7:0] b,                         //input fragment color compment b
    input [7:0] a,                         //input fragment color compment a   
    input fragment_valid,                  //input fragment valid, high active
    input [CACHE_ADDR_BITS-1:0] tex_addr, //input texture address
    input [31:0] tex_width,                //input texture width, support up to 2^15 texels due to memory width
    input [31:0] tex_height,               //input texture height, support up to 2^15 texels due to memory width
    input [1:0]  wrap_mode_s,              //input texture wrap mode for s direction
    input [1:0]  wrap_mode_t,              //input texture wrap mode for t direction 
    gpu_bus      src[4],                   //memory bus
    output fragment_tex_shading_busy,      //output busy, indicate whether this module is busy for new input, high active 
    output [31:0] x_out,                   //output fragment x coordinate, fix point 1.15.16 
    output [31:0] y_out,                   //output fragment y coordinate, fix point 1.15.16 
    output [31:0] z_out,                   //output fragment z coordinate, fix point 1.15.16 
    output [31:0] fog_x_out,               //output fragment x fog coordinate
    output [31:0] fog_y_out,               //output fragment y fog coordinate
    output [31:0] fog_z_out,               //output fragment z fog coordinate
    output [7:0] r_out,                    //output fragment color compment r
    output [7:0] g_out,                    //output fragment color compment g
    output [7:0] b_out,                    //output fragment color compment b
    output [7:0] a_out,                    //output fragment color compment a
    output [7:0] r_tex_out,                //output fragment texture color compment r
    output [7:0] g_tex_out,                //output fragment texture color compment g
    output [7:0] b_tex_out,                //output fragment texture color compment b
    output [7:0] a_tex_out,                //output fragment texture color compment a
    output fragment_out_valid              //output fragment valid, high active
);



//u v fifo interface
wire [31:0] u;             //u = s_trans * texture width,  transform from parameter space to texture space, fix point 24.8,from tex_uv_cal to uv fifo
wire [31:0] v;             //v = t_trans * texture height, transform from parameter space to texture space, fix point 24.8,from tex_uv_cal to uv fifo
wire uv_fifo_wr;           //uv valid for uv fifo,high active,from tex_uv_cal to uv fifo  
wire uv_fifo_rd;           //uv fifo read, high active, from texel gen to uv fifo
wire [31:0] u_out;         //fifo output u, from  uv fifo to texel gen 
wire [31:0] v_out;         //fifo output v, from  uv fifo to texel gen 
wire uv_fifo_empty;        //uv fifo empty
wire uv_fifo_prog_full;    //uv fifo full

//xy_fifo intetrface
wire xy_fifo_rd;           //xy fifo rd, from texel gen to xy fifo 
wire xy_fifo_full;         //xy fifo full
wire xy_fifo_empty;        //xy fifo empty
wire xy_fifo_almost_full;  //xy fifo almost full
wire [31:0] fragment_x;    //xy fifo output current fragment x coordinate
wire [31:0] fragment_y;    //xy fifo output current fragment y coordinate
wire [31:0] fragment_z;    //xy fifo output current fragment z coordinate
wire [31:0] fragment_fog_x;//xy fifo output current fragment x fog coordinate
wire [31:0] fragment_fog_y;//xy fifo output current fragment y fog coordinate
wire [31:0] fragment_fog_z;//xy fifo output current fragment z fog coordinate
wire [7:0]  fragment_r;    //xy fifo output current fragment color
wire [7:0]  fragment_g;    //xy fifo output current fragment color
wire [7:0]  fragment_b;    //xy fifo output current fragment color
wire [7:0]  fragment_a;    //xy fifo output current fragment color

wire [CACHE_ADDR_BITS-1:0]     block_addr0;           //texel 0 cache line address , for bilinear filter
wire [6:0]                     in_block_addr0;        //texel 0 in cache line address
wire [CACHE_ADDR_BITS-1:0]     block_addr1;           //texel 1 cache line address , for bilinear filter
wire [6:0]                     in_block_addr1;        //texel 1 in cache line address                   
wire [CACHE_ADDR_BITS-1:0]     block_addr2;           //texel 2 cache line address , for bilinear filter
wire [6:0]                     in_block_addr2;        //texel 2 in cache line address                   
wire [CACHE_ADDR_BITS-1:0]     block_addr3;           //texel 3 cache line address , for bilinear filter
wire [6:0]                     in_block_addr3;        //texel 3 in cache line address                   
wire [31:0]                    texel_32_0;            //texel 0 value
wire [31:0]                    texel_32_1;            //texel 1 value
wire [31:0]                    texel_32_2;            //texel 2 value
wire [31:0]                    texel_32_3;            //texel 3 value
wire                           texel_32_en;           //texel value valid
wire [1:0] addr_fifo_w_en;                            //texel address valid, 1: bilinear enable, 0: address valid 
wire addr_fifo_almost_full;                           //address fifo in cache full

wire [31:0] s_trans;                                  //transformed s
wire [31:0] t_trans;                                  //transformed t
wire st_trans_en;                                     //transformed st valid 
wire xy_fifo_prog_full;                               //xy fifo almost full

wire tex_uv_cal_busy;                                 //tex uv cal busy

wire pixel_gen_busy;                                  //pixel gen busy

wire fragment_out_valid_wire;

assign fragment_tex_shading_busy = uv_fifo_prog_full || xy_fifo_prog_full;  //busy for pixel shader
assign fragment_out_valid = fragment_out_valid_wire && (~busy_in);


//transform s t according to the matrix
uv_translate #(
    .UV_PRECISION(UV_PRECISION)
)u_uv_translate (
    .SCAN_mode(1'b0),  
    .clk(clk),                    //input clock
    .rst_n(rst_n),                //input reset, low active
    .bte_end(1'b0),               //input tile end
    .x(s),                        //input s coordinate, 1.15.16
    .y(t),                        //input t coordinate, 1.15.16
    .xy_en(fragment_valid),       //input st valid
    .busy_in(tex_uv_cal_busy),    //input busy
    .matrix_A(matrix_A),          //input matrix A, 1.15.16
    .matrix_B(matrix_B),          //input matrix B, 1.15.16
    .matrix_C(matrix_C),          //input matrix C, 1.15.16
    .matrix_D(matrix_D),          //input matrix D, 1.15.16
    .matrix_E(matrix_E),          //input matrix E, 1.15.16
    .matrix_F(matrix_F),          //input matrix F, 1.15.16
    .matrix_G(matrix_G),          //input matrix G, 1.15.16
    .matrix_H(matrix_H),          //input matrix H, 1.15.16
    .u(s_trans),                  //output s_trans, 1.3.28
    .v(t_trans),                  //output t_trans, 1.3.28
    .uv_en(st_trans_en)           //output valid
);

//transform st to uv
tex_uv_cal#(
    .UV_PRECISION(UV_PRECISION)
)u_tex_uv_cal(
    .clk(clk),                    //input clock
    .rst_n(rst_n),                //input reset, low active
    .busy_in(uv_fifo_prog_full),  //input busy 
    .s(s_trans),                  //input s_trans, 1.3.28
    .t(t_trans),                  //input t_trans, 1.3.28
    .wrap_mode_s(wrap_mode_s),    //s dircetion wrap mode,support: 2'b00 repeat; 2'b01 clamp to edge; 2'b10 clamp to border; 2'b11 mirrored repeat
    .wrap_mode_t(wrap_mode_t),    //t dircetion wrap mode,support: 2'b00 repeat; 2'b01 clamp to edge; 2'b10 clamp to border; 2'b11 mirrored repeat
    .cal_en(st_trans_en),         //input valid
    .tex_width(tex_width[23:0]),        //texture width,support up to 2^24-1
    .tex_height(tex_height[23:0]),      //teture height,support up to 2^24-1   
    .u(u),                        //output u 24.8
    .v(v),                        //output v 24.8
    .valid(uv_fifo_wr),           //output valid
    .busy_out(tex_uv_cal_busy)    //output busy
);

//data fifo for fragment data 
fifo #(
    .DWIDTH(32+32+32+32+32+32+8+8+8+8),
    .DSIZE(7)//8
)u_xy_fifo(
    .SCAN_mode(1'b0),                                                                                                                      
    .datain({x,y,z,fog_x,fog_y,fog_z,r,g,b,a}),                                                                                            
    .rd(xy_fifo_rd),                                                                                                                        //input data              
    .wr(fragment_valid),                                                                                                                    //fifo read               
    .rst_n(rst_n),                                                                                                                          //fifo write              
    .clk(clk),                                                                                                                              //input reset, low active 
    .dataout({fragment_x,fragment_y,fragment_z,fragment_fog_x,fragment_fog_y,fragment_fog_z,fragment_r,fragment_g,fragment_b,fragment_a}),  //input clock             
    .full(xy_fifo_full),                                                                                                                    //output data             
    .empty(xy_fifo_empty),                                                                                                                  //fifo full               
    .almost_full(xy_fifo_prog_full)                                                                                                         //fifo empty              
);                                                                                                                                          //fifo almost full        

//uv fifo 
fifo #(
    .DWIDTH(64),
    .DSIZE(3),//7
    .AFULL(2**3 - 3)
)u_uv_fifo(
    .SCAN_mode(1'b0),  
    .datain({u,v}),                     //input data             
    .rd(uv_fifo_rd),                    //fifo read              
    .wr(uv_fifo_wr),                    //fifo write             
    .rst_n(rst_n),                      //input reset, low active
    .clk(clk),                          //input clock            
    .dataout({u_out,v_out}),            //output data            
    .full(),                            //fifo full              
    .empty(uv_fifo_empty),              //fifo empty             
    .almost_full(uv_fifo_prog_full)     //fifo almost full       
);

//texel generate
shading_texture_gen u_texel_generate (
    .rst_n(rst_n),                                     //input reset, low active  
    .clk(clk),                                         //input clock 
    .busy_in(busy_in),                                 //input busy
    .uv_fifo_empty(uv_fifo_empty),                     //input uv fifo empty
    .xy_fifo_empty(xy_fifo_empty),                     //input xy fifo empty
    .u(u_out),                                         //input u from uv fifo, 24.8
    .v(v_out),                                         //input v from uv fifo, 24.8
    .uv_fifo_rd(uv_fifo_rd),                           //output uv fifo read
    .tex_width(tex_width),                             //texture width,support up to 2^24-1
    .tex_height(tex_height),                           //teture height,support up to 2^24-1
    .tex_addr(tex_addr),                               //current texture address in memory
    .block_addr0    (block_addr0    ),                 //texel 0 cache line address , for bilinear filter          
    .in_block_addr0 (in_block_addr0 ),                 //texel 0 in cache line address                             
    .block_addr1    (block_addr1    ),                 //texel 1 cache line address , for bilinear filter          
    .in_block_addr1 (in_block_addr1 ),                 //texel 1 in cache line address                             
    .block_addr2    (block_addr2    ),                 //texel 2 cache line address , for bilinear filter          
    .in_block_addr2 (in_block_addr2 ),                 //texel 2 in cache line address                             
    .block_addr3    (block_addr3    ),                 //texel 3 cache line address , for bilinear filter          
    .in_block_addr3 (in_block_addr3 ),                 //texel 3 in cache line address                             
    .addr_fifo_w_en(addr_fifo_w_en),                   //texel address valid, 1: bilinear enable, 0: address valid                                            
    .addr_fifo_almost_full(addr_fifo_almost_full),     //address fifo in cache full                                                                           
    .x_in(fragment_x),                                 //from xy fifo, input fragment x coordinate                                         
    .y_in(fragment_y),                                 //from xy fifo, input fragment y coordinate                                         
    .z_in(fragment_z),                                 //from xy fifo, input fragment z coordinate                                         
    .fog_x_in(fragment_fog_x),                         //from xy fifo, input fragment x fog coordinate    
    .fog_y_in(fragment_fog_y),                         //from xy fifo, input fragment y fog coordinate    
    .fog_z_in(fragment_fog_z),                         //from xy fifo, input fragment z fog coordinate    
    .r_in    (fragment_r    ),                         //from xy fifo, input fragment color compment r    
    .g_in    (fragment_g    ),                         //from xy fifo, input fragment color compment g    
    .b_in    (fragment_b    ),                         //from xy fifo, input fragment color compment b    
    .a_in    (fragment_a    ),                         //from xy fifo, input fragment color compment a    
    .xy_fifo_rd(xy_fifo_rd),                           //xy fifo read
    .texel_32_en(texel_32_en),                         //texel value valid 
    .texel_32_0(texel_32_0),                           //texel 0 value    
    .texel_32_1(texel_32_1),                           //texel 1 value    
    .texel_32_2(texel_32_2),                           //texel 2 value    
    .texel_32_3(texel_32_3),                           //texel 3 value    
    .r_out_gen(r_tex_out),                             //output texel r value 
    .g_out_gen(g_tex_out),                             //output texel g value
    .b_out_gen(b_tex_out),                             //output texel b value
    .a_out_gen(a_tex_out),                             //output texel a value
    .x_out(x_out),                                     //output fragment x coordinate         
    .y_out(y_out),                                     //output fragment y coordinate         
    .z_out(z_out),                                     //output fragment z coordinate         
    .fog_x_out(fog_x_out),                             //output fragment x fog coordinate     
    .fog_y_out(fog_y_out),                             //output fragment y fog coordinate     
    .fog_z_out(fog_z_out),                             //output fragment z fog coordinate     
    .r_out    (r_out    ),                             //output fragment color compment r     
    .g_out    (g_out    ),                             //output fragment color compment g     
    .b_out    (b_out    ),                             //output fragment color compment b     
    .a_out    (a_out    ),                             //output fragment color compment a     
    .pixel_out_en(fragment_out_valid_wire),            //output valid
    .busy_out(pixel_gen_busy)                          //output busy
);

//texel cache
texture_cache u_texture_cache(
    .SCAN_mode(1'b0), 
    .clk(clk),                                         //input reset, low active  
    .arst_n(rst_n),                                    //input clock
    .rec_busy(pixel_gen_busy),                         //input rx busy
    .GPE_start(1'b0),                                  //input cache tag clear
    .block_addr0(block_addr0),                         //texel 0 cache line address , for bilinear filter         
    .in_block_addr0(in_block_addr0),                   //texel 0 in cache line address                            
    .block_addr1(block_addr1),                         //texel 1 cache line address , for bilinear filter         
    .in_block_addr1(in_block_addr1),                   //texel 1 in cache line address                            
    .block_addr2(block_addr2),                         //texel 2 cache line address , for bilinear filter         
    .in_block_addr2(in_block_addr2),                   //texel 2 in cache line address                            
    .block_addr3(block_addr3),                         //texel 3 cache line address , for bilinear filter         
    .in_block_addr3(in_block_addr3),                   //texel 3 in cache line address                            
    .textype(4'b0101),                                 //textype, fixed RGBA 8888
    .rd_en(addr_fifo_w_en[0]),                         //address valid 
    .filt_en(addr_fifo_w_en[1]),                       //bilinear enable                               
    .texel_32_0(texel_32_0),                           //texel 0 value     
    .texel_32_1(texel_32_1),                           //texel 1 value     
    .texel_32_2(texel_32_2),                           //texel 2 value     
    .texel_32_3(texel_32_3),                           //texel 3 value     
    .texel_32_en(texel_32_en),                         //texel value valid 
    .cache_busy(addr_fifo_almost_full),                //address fifo in cache full 
    .src(src[0:3])                                     //bus
);



endmodule
